Home Work 5 - bakharia

Since 2008, guests and hosts have used Airbnb to expand on traveling possibilities and present more unique, personalized way of experiencing the world. This dataset describes the listing activity and metrics in NYC, NY for 2019. This data file includes all needed information to find out more about hosts, geographical availability, necessary metrics to make predictions and draw conclusions.

Shubham Mishra
08-23-2021
Reading the dataset and rearranging the columns
ab_nyc <- read.csv("../../_data/AB_NYC_2019.csv") %>%
  relocate(c("latitude", "longitude", "id", "name", "host_id", "host_name", "neighbourhood_group", "neighbourhood", "room_type", "minimum_nights", "number_of_reviews", "last_review", "reviews_per_month", "calculated_host_listings_count", "availability_365"))

head(ab_nyc)
  latitude longitude   id
1 40.64749 -73.97237 2539
2 40.75362 -73.98377 2595
3 40.80902 -73.94190 3647
4 40.68514 -73.95976 3831
5 40.79851 -73.94399 5022
6 40.74767 -73.97500 5099
                                              name host_id
1               Clean & quiet apt home by the park    2787
2                            Skylit Midtown Castle    2845
3              THE VILLAGE OF HARLEM....NEW YORK !    4632
4                  Cozy Entire Floor of Brownstone    4869
5 Entire Apt: Spacious Studio/Loft by central park    7192
6        Large Cozy 1 BR Apartment In Midtown East    7322
    host_name neighbourhood_group neighbourhood       room_type
1        John            Brooklyn    Kensington    Private room
2    Jennifer           Manhattan       Midtown Entire home/apt
3   Elisabeth           Manhattan        Harlem    Private room
4 LisaRoxanne            Brooklyn  Clinton Hill Entire home/apt
5       Laura           Manhattan   East Harlem Entire home/apt
6       Chris           Manhattan   Murray Hill Entire home/apt
  minimum_nights number_of_reviews last_review reviews_per_month
1              1                 9  2018-10-19              0.21
2              1                45  2019-05-21              0.38
3              3                 0                            NA
4              1               270  2019-07-05              4.64
5             10                 9  2018-11-19              0.10
6              3                74  2019-06-22              0.59
  calculated_host_listings_count availability_365 price
1                              6              365   149
2                              2              355   225
3                              1              365   150
4                              1              194    89
5                              1                0    80
6                              1              129   200
Finding out the least expensive stay at a bnb by multiplying the cost/night with the min no days for staying
min_cost <- st_as_sf(as.data.frame(ab_nyc$minimum_nights * ab_nyc$price) 
                     %>% `colnames<-`(c("Minimum Cost of Stay")) 
                     %>% mutate(lat = ab_nyc$latitude, lon = ab_nyc$longitude) 
                     %>% select(lat, lon, `Minimum Cost of Stay`),
                    coords = c("lon", "lat"),
                     crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
                     )
Using summary to specify the affordability of each place (#1 <= 135, #2 <= 300, #3 <= 734, #4 > 734)
summary(min_cost)
 Minimum Cost of Stay          geometry    
 Min.   :      0.0    POINT        :48895  
 1st Qu.:    135.0    epsg:NA      :    0  
 Median :    300.0    +proj=long...:    0  
 Mean   :   1284.4                         
 3rd Qu.:    734.5                         
 Max.   :1170000.0                         
info <- summary(min_cost)
#as.numeric(str_extract(info[3], "[0-9]+"))
min_cost <- min_cost %>%
  mutate(Affordability = case_when(
    `Minimum Cost of Stay` <= as.numeric(str_extract(info[2], "[0-9]+")) ~ 1,
    `Minimum Cost of Stay` <= as.numeric(str_extract(info[3], "[0-9]+")) ~ 2,
    `Minimum Cost of Stay` <= as.numeric(str_extract(info[5], "[0-9]+")) ~ 3,
    `Minimum Cost of Stay` > as.numeric(str_extract(info[5], "[0-9]+")) ~ 4
  ))
Mapping all the places based on the affordability using the lat and long provided in the dataset using the mapview library
library(mapview)
min_cost %>%
  select(geometry, Affordability) %>%
  mapview(zcol = "Affordability")
Using groupby and summarise to find out the owners who own most no of bnbs in the city
                  #as.data.frame(table(ab_nyc$host_id)) %>% `colnames<-`(c("host_id","No. of Places"))
no_of_places <- as.data.frame(ab_nyc %>% group_by(host_id,host_name) %>% summarise(n = n()) %>% `colnames<-`(c("host_id", "host_name", "No. of Places")))
no_of_places %>%
  arrange(desc(`No. of Places` ))%>%
  slice(1:10)%>%
  ggplot(aes(host_name,`No. of Places`, fill=as.character(host_id))) +
  geom_bar(stat = "identity") + theme_minimal() + coord_flip() + 
  labs(x = "Host Name", fill = "Host ID")

Find the relation number of listings at place with the experience with the of owner(based on number of places owned). It is found that the avg_listings by the experienced owner(having multiple bnbs) is more. The graph from above remains the same
no_of_places %>%
  mutate(listings = sapply(host_id,function(x){return (sum(ab_nyc[ab_nyc$host_id == as.numeric(x), "calculated_host_listings_count"]))}))%>%
  mutate(avg_listings = listings/`No. of Places`) %>%
  arrange(desc(avg_listings)) %>%
  slice(1:10) %>%
  ggplot(aes(host_name,avg_listings, fill = as.character(host_id))) +
  geom_bar(stat = "identity") + theme_minimal() +
  labs(x = "Host Name", y = "Avg Listings", fill = "Host ID") + coord_flip()

On categorisingon the basis of privacy(shared room~ shared, private room~ Room, Entire Place ~ Entire), it is found that the most number of bnbs offer the entire place.
ab_nyc %>%
  mutate(Type = case_when(
    str_extract(room_type, "[^\\s]+") == "Shared" ~ "Shared",
    str_extract(room_type, "[^\\s]+") == "Private"~ "Room",
    str_extract(room_type, "[^\\s]+") == "Entire" ~ "House/Apt"
  )) %>%
  ggplot(aes(Type)) +
  geom_bar(aes(fill = Type)) + theme_minimal() 

On categorising all the bnbs based on the neighbourhood that they are in, it is found that Manhattan has the most number of bnbs
ab_nyc %>%
  group_by(neighbourhood_group) %>%
  summarise(Count = n()) %>%
  arrange(desc(Count))%>%
  ggplot(aes(neighbourhood_group, Count, fill = neighbourhood_group)) +
  geom_bar(stat = "identity") + theme_minimal()